In [1]:
# Importing the Libraries
import pandas as pd
import numpy as np
import plotly.express as px
In [2]:
# Loading the Dataset
data = pd.read_csv('deliverytime.txt')
In [3]:
# Analyse the Top 5 rows of the Dataset
data.head()
Out[3]:
| ID | Delivery_person_ID | Delivery_person_Age | Delivery_person_Ratings | Restaurant_latitude | Restaurant_longitude | Delivery_location_latitude | Delivery_location_longitude | Type_of_order | Type_of_vehicle | Time_taken(min) | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4607 | INDORES13DEL02 | 37 | 4.9 | 22.745049 | 75.892471 | 22.765049 | 75.912471 | Snack | motorcycle | 24 |
| 1 | B379 | BANGRES18DEL02 | 34 | 4.5 | 12.913041 | 77.683237 | 13.043041 | 77.813237 | Snack | scooter | 33 |
| 2 | 5D6D | BANGRES19DEL01 | 23 | 4.4 | 12.914264 | 77.678400 | 12.924264 | 77.688400 | Drinks | motorcycle | 26 |
| 3 | 7A6A | COIMBRES13DEL02 | 38 | 4.7 | 11.003669 | 76.976494 | 11.053669 | 77.026494 | Buffet | motorcycle | 21 |
| 4 | 70A2 | CHENRES12DEL01 | 32 | 4.6 | 12.972793 | 80.249982 | 13.012793 | 80.289982 | Snack | scooter | 30 |
In [4]:
# Check information about the Dataset
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 45593 entries, 0 to 45592 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 45593 non-null object 1 Delivery_person_ID 45593 non-null object 2 Delivery_person_Age 45593 non-null int64 3 Delivery_person_Ratings 45593 non-null float64 4 Restaurant_latitude 45593 non-null float64 5 Restaurant_longitude 45593 non-null float64 6 Delivery_location_latitude 45593 non-null float64 7 Delivery_location_longitude 45593 non-null float64 8 Type_of_order 45593 non-null object 9 Type_of_vehicle 45593 non-null object 10 Time_taken(min) 45593 non-null int64 dtypes: float64(5), int64(2), object(4) memory usage: 3.8+ MB
In [5]:
# Now let's check for any null values
data.isnull().sum()
Out[5]:
ID 0 Delivery_person_ID 0 Delivery_person_Age 0 Delivery_person_Ratings 0 Restaurant_latitude 0 Restaurant_longitude 0 Delivery_location_latitude 0 Delivery_location_longitude 0 Type_of_order 0 Type_of_vehicle 0 Time_taken(min) 0 dtype: int64
In [7]:
# Now let's check for any duplicated values
data.duplicated().sum()
Out[7]:
0
Calculating Distance Between Two Latitudes and Longitudes¶
The dataset doesn’t have any feature that shows the difference between the restaurant and the delivery location. All we have are the latitude and longitude points of the restaurant and the delivery location. We can use the haversine formula to calculate the distance between two locations based on their latitudes and longitudes¶
In [8]:
# Set the earth's radius (in kilometers)
R = 6371
# Convert degrees to radians
def deg_to_rad(degrees):
return degrees * (np.pi/180)
# Function to calculate distance between two points using the haversine formula
def distcalculate(lat1, lon1, lat2, lon2):
d_lat = deg_to_rad(lat2-lat1)
d_lon = deg_to_rad(lon2-lon1)
a = np.sin(d_lat/2)**2 + np.cos(deg_to_rad(lat1)) * np.cos(deg_to_rad(lat2)) * np.sin(d_lon/2)**2
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
return R * c
In [9]:
# Calculate the distance between each pair of points
data['distance'] = np.nan
In [11]:
for i in range(len(data)):
data.loc[i, 'distance'] = distcalculate(data.loc[i, 'Restaurant_latitude'],
data.loc[i, 'Restaurant_longitude'],
data.loc[i, 'Delivery_location_latitude'],
data.loc[i, 'Delivery_location_longitude'])
In [12]:
# Now let's check the new feature we have added in the dataset as distance
data.head()
Out[12]:
| ID | Delivery_person_ID | Delivery_person_Age | Delivery_person_Ratings | Restaurant_latitude | Restaurant_longitude | Delivery_location_latitude | Delivery_location_longitude | Type_of_order | Type_of_vehicle | Time_taken(min) | distance | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4607 | INDORES13DEL02 | 37 | 4.9 | 22.745049 | 75.892471 | 22.765049 | 75.912471 | Snack | motorcycle | 24 | 3.025149 |
| 1 | B379 | BANGRES18DEL02 | 34 | 4.5 | 12.913041 | 77.683237 | 13.043041 | 77.813237 | Snack | scooter | 33 | 20.183530 |
| 2 | 5D6D | BANGRES19DEL01 | 23 | 4.4 | 12.914264 | 77.678400 | 12.924264 | 77.688400 | Drinks | motorcycle | 26 | 1.552758 |
| 3 | 7A6A | COIMBRES13DEL02 | 38 | 4.7 | 11.003669 | 76.976494 | 11.053669 | 77.026494 | Buffet | motorcycle | 21 | 7.790401 |
| 4 | 70A2 | CHENRES12DEL01 | 32 | 4.6 | 12.972793 | 80.249982 | 13.012793 | 80.289982 | Snack | scooter | 30 | 6.210138 |
Data Exploration¶
In [13]:
figure = px.scatter(data_frame= data,
x = 'distance',
y = 'Time_taken(min)',
size = 'Time_taken(min)',
trendline = 'ols',
title = 'Relationship Between Distance and Time Taken')
figure.show()
In [14]:
# Now let’s have a look at the relationship between the time taken to deliver the food and the age of the delivery partner
figure = px.scatter(data_frame = data,
x="Delivery_person_Age",
y="Time_taken(min)",
size="Time_taken(min)",
color = "distance",
trendline="ols",
title = "Relationship Between Time Taken and Age")
figure.show()
In [15]:
# Now let’s have a look at the relationship between the time taken to deliver the food and the ratings of the delivery partner
figure = px.scatter(data_frame = data,
x="Delivery_person_Ratings",
y="Time_taken(min)",
size="Time_taken(min)",
color = "distance",
trendline="ols",
title = "Relationship Between Time Taken and Ratings")
figure.show()
In [16]:
# Now let’s have a look at the type of food ordered by the customer and whether the type of vehicle used by the delivery partner affects the delivery time or not
fig = px.box(data,
x="Type_of_vehicle",
y="Time_taken(min)",
color="Type_of_order")
fig.show()
Food Delivery Time Prediction Model¶
In [17]:
# Splitting the Dataset
from sklearn.model_selection import train_test_split
In [18]:
X = np.array(data[["Delivery_person_Age",
"Delivery_person_Ratings",
"distance"]])
y = np.array(data[["Time_taken(min)"]])
In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.10, random_state= 42)
In [21]:
# creating the LSTM neural network model
from keras.models import Sequential
from keras.layers import Dense, LSTM
In [22]:
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape = (X_train.shape[1], 1)))
model.add(LSTM(64, return_sequences= False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()
C:\Users\Kamran\anaconda3\anaconda_new\Lib\site-packages\keras\src\layers\rnn\rnn.py:200: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ lstm (LSTM) │ (None, 3, 128) │ 66,560 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ lstm_1 (LSTM) │ (None, 64) │ 49,408 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 25) │ 1,625 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 1) │ 26 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 117,619 (459.45 KB)
Trainable params: 117,619 (459.45 KB)
Non-trainable params: 0 (0.00 B)
In [24]:
# training the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=1, epochs=9)
Epoch 1/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 103s 2ms/step - loss: 75.7650 Epoch 2/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 107s 3ms/step - loss: 64.7484 Epoch 3/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 93s 2ms/step - loss: 60.8735 Epoch 4/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 91s 2ms/step - loss: 60.0647 Epoch 5/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 88s 2ms/step - loss: 60.2912 Epoch 6/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 89s 2ms/step - loss: 59.0659 Epoch 7/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 94s 2ms/step - loss: 59.0924 Epoch 8/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 88s 2ms/step - loss: 58.4746 Epoch 9/9 41033/41033 ━━━━━━━━━━━━━━━━━━━━ 89s 2ms/step - loss: 58.9557
Out[24]:
<keras.src.callbacks.history.History at 0x224194fdbe0>
In [25]:
# Now let’s test the performance of our model by giving inputs to predict the food delivery time:
print("Food Delivery Time Prediction")
a = int(input("Age of Delivery Partner: "))
b = float(input("Ratings of Previous Deliveries: "))
c = int(input("Total Distance: "))
features = np.array([[a, b, c]])
print("Predicted Delivery Time in Minutes = ", model.predict(features))
Food Delivery Time Prediction
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 456ms/step Predicted Delivery Time in Minutes = [[35.605858]]
In [ ]: